1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
| from flask import Flask import requests from time import sleep import jieba.analyse from html2text import html2text from PIL import Image, ImageSequence import numpy as np import matplotlib.pyplot as plt from wordcloud import WordCloud, ImageColorGenerator from collections import OrderedDict from flask import render_template, request
app = Flask(__name__)
def get_user_info(page):
result = requests.get( 'https://m.weibo.cn/api/container/getIndex?uid=1669879400&luicode=10000011&lfid=1076031669879400&featurecode=20000320&type=all&containerid=1076031669879400&page={}'.format( page)) print(result.json()) json_data = result.json() if json_data['cards'][1]['mblog']['user']['gender'] == 'm': gender = '男' elif json_data['cards'][1]['mblog']['user']['gender'] == 'f': gender = '女' else: gender = '未知' userinfo = { 'name': json_data['cards'][1]['mblog']['user']['screen_name'], 'gender': gender, 'description': json_data['cards'][1]['mblog']['user']['description'], 'follow_count': json_data['cards'][1]['mblog']['user']['follow_count'], 'followers_count': json_data['cards'][1]['mblog']['user']['followers_count'], 'profile_image_url': json_data['cards'][1]['mblog']['user']['profile_image_url'], 'verified_reason': json_data['cards'][1]['mblog']['user']['verified_reason'], 'containerid': json_data['cardlistInfo']['containerid'] } return userinfo
def get_all_post(uid, containerid, pages): page = 0 posts = [] while True: result = requests.get( 'https://m.weibo.cn/api/container/getIndex?uid=' + uid + '&luicode=10000011&lfid=1076031669879400&featurecode=20000320&type=all&containerid=' + containerid + '&page={}'.format( page)) json_data = result.json()
if not json_data['cards']: break
for i in json_data['cards']: posts.append(i['mblog']['text'])
sleep(0.5)
page += 1
return posts
def generate_personas(uid, data_list): content = '\n'.join([html2text(i) for i in data_list])
result = jieba.analyse.textrank(content, topK=1000, withWeight=True)
keywords = dict() for i in result: print(i[0], i[1]) keywords[i[0]] = i[1]
image = Image.open('./resourse/dili.png') graph = np.array(image)
wc = WordCloud(font_path='./resourse/simhei.ttf', background_color='white', max_words=300, mask=graph) wc.generate_from_frequencies(keywords) image_color = ImageColorGenerator(graph) plt.imshow(wc) plt.imshow(wc.recolor(color_func=image_color)) plt.axis("off") plt.show() dest_img = './resourse/{}.png'.format(uid) plt.savefig(dest_img) return dest_img
@app.route('/', methods=['GET', 'POST']) def index(): userinfo = {} if request.method == 'POST' and request.form.get('uid'): uid = request.form.get('uid') print(uid) page = 0 userinfo = get_user_info(page) posts = get_all_post(uid, userinfo['containerid'],page) dest_img = generate_personas(uid, posts) userinfo['personas'] = dest_img return render_template('index.html', **userinfo)
if __name__ == '__main__': app.run()
|